691595264a59c8cf4f6b716376fd8a5cf374e119,src/edu/stanford/nlp/pipeline/ProtobufAnnotationSerializer.java,ProtobufAnnotationSerializer,fromProto,#CoreNLPProtos.Document#,691

Before Change


      if (!tokens.isEmpty() && sentence.hasTokenOffsetBegin() && sentence.hasTokenOffsetEnd()) {
        // Set tokens for sentence
        int tokenBegin = Math.min(sentence.getTokenOffsetBegin(), tokens.size());
        int tokenEnd = Math.min(sentence.getTokenOffsetEnd(), tokens.size());
        map.set(TokensAnnotation.class, tokens.subList(tokenBegin, tokenEnd));
        // Set sentence index + token index + paragraph index
        for (int i = tokenBegin; i < tokenEnd; ++i) {

After Change


          map.get(TokensAnnotation.class) == null) {
        // Set tokens for sentence
        int tokenBegin = sentence.getTokenOffsetBegin();
        int tokenEnd = sentence.getTokenOffsetEnd();
        assert tokenBegin <= tokens.size() && tokenBegin <= tokenEnd;
        assert tokenEnd <= tokens.size();
        map.set(TokensAnnotation.class, tokens.subList(tokenBegin, tokenEnd));
        // Set sentence index + token index + paragraph index
        for (int i = tokenBegin; i < tokenEnd; ++i) {
          tokens.get(i).setSentIndex(sentIndex);
          tokens.get(i).setIndex(i - sentence.getTokenOffsetBegin() + 1);
          if (sentence.hasParagraph()) { tokens.get(i).set(ParagraphAnnotation.class, sentence.getParagraph()); }
        }
        // Set text
        int characterBegin = sentence.getCharacterOffsetBegin();
        int characterEnd = sentence.getCharacterOffsetEnd();
        if (characterEnd <= proto.getText().length()) {
          // The usual case -- get the text from the document text
          map.set(TextAnnotation.class, proto.getText().substring(characterBegin, characterEnd));
        } else {
          // The document text is wrong -- guess the text from the tokens
          map.set(TextAnnotation.class, recoverOriginalText(tokens.subList(tokenBegin, tokenEnd)));
        }
      }
      // End iteration